library(ggplot2)
library(vegan)
## 载入需要的程辑包:permute
## 载入需要的程辑包:lattice
## This is vegan 2.6-2
library(colorRamps)
library(psych)
##
## 载入程辑包:'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(tidyr)
library(readxl)
library(ape)
library(ggplot2)
library(colorRamps)
library(ggrepel)
rm(list = ls())
load("data/CForBio.data.prep.2022.11.22.RData")
library(vegan)
cutoff <- 1000
sort(colSums(bac.amp1))
## BD1623 DL1904 NG14 XS0601 BTM1515 GT0910 NG7 BD1512 TT1419 CB2424
## 24903 26972 27053 27234 29143 31098 33452 34090 34339 34770
## GT2515 TT0302 DHS0824 HS1433 HS1531 TT1201 HS2325 BTM2317 GT0206 DHS0911
## 34826 35210 35306 35369 35611 35751 36395 36829 36923 37867
## LS11 DHS0808 BTM1323 NG8 CB2009 XS0809 DL0810 LS9 DL0906 XS2022
## 39954 41573 42171 42958 43395 45875 45977 46174 46676 47364
## GH1 GH4 BD1203 LS15 CB0122 GH2
## 48535 54597 55685 60392 65051 65829
sort(colSums(bac.mgm1))
## HS1433 BD1623 NG7 XS2022 DHS0824 DHS0808 DL1904 BTM1323
## 9115574 9597402 9667544 9737388 9875341 10013817 10016208 10110020
## TT1419 HS2325 GT0910 DL0810 GT2515 GT0206 TT0302 NG8
## 10160188 10164701 10195067 10301253 10395324 10534807 10643744 10648853
## XS0809 HS1531 LS11 CB2424 BTM2317 XS0601 GH1 DL0906
## 10675545 10684100 10970718 11115704 11143274 11320447 11461977 11544870
## BD1512 GH2 NG14 DHS0911 CB0122 CB2009 BD1203 TT1201
## 11738899 11750163 11847992 11987853 12237915 12559810 12632900 12911304
## LS9 LS15 GH4 BTM1515
## 13903753 14521553 14922282 15939544
sort(colSums(ko.mgm1))
## HS1433 HS1531 BD1512 HS2325 DHS0911 BD1203 CB0122 XS2022
## 516612.0 554923.2 561838.9 564412.5 570647.8 572166.7 578939.1 581887.7
## BD1623 TT1419 GH4 XS0809 CB2009 TT0302 DHS0824 GT0206
## 585609.5 588950.4 590928.8 591823.2 595951.2 597472.6 597523.9 605316.2
## GT0910 DHS0808 XS0601 TT1201 LS9 LS15 NG8 GT2515
## 607017.0 611526.6 612949.0 618809.8 618974.8 621618.7 623901.4 624726.0
## NG14 NG7 BTM1323 BTM1515 LS11 BTM2317 CB2424 GH2
## 626956.5 630290.3 640564.2 640670.3 653233.2 657988.6 661383.3 664885.4
## GH1 DL0906 DL0810 DL1904
## 670864.8 682216.2 709302.7 712869.3
sort(colSums(cog.mgm1))
## HS1433 BD1512 CB0122 DHS0911 HS1531 BD1623 HS2325 GH4
## 665137.6 700954.6 701771.9 706113.6 707706.2 717680.3 719426.8 719907.9
## BD1203 TT1419 XS2022 CB2009 XS0809 NG8 TT0302 LS9
## 720810.2 735415.6 736115.9 736728.3 737294.4 738141.8 741127.0 741129.8
## DHS0824 NG14 GT0206 NG7 LS15 GT0910 DHS0808 XS0601
## 742211.5 745558.0 745871.5 746178.6 747159.8 748678.5 749599.8 753834.3
## BTM2317 BTM1323 BTM1515 TT1201 LS11 GT2515 CB2424 DL0906
## 761668.5 762288.9 766129.1 769869.2 772483.2 780307.3 785374.0 785610.6
## GH2 GH1 DL1904 DL0810
## 806012.5 810165.7 822934.2 825360.4
sort(colSums(rgi.mgm1))
## GH4 CB0122 CB2009 NG8 BD1623 NG7 NG14 DL0906
## 20995.21 23022.58 24293.34 24840.47 25081.28 25099.30 25294.91 25324.26
## BD1512 LS15 HS1433 LS9 TT1419 LS11 BD1203 BTM2317
## 25426.07 25842.94 25856.23 25965.01 26532.75 26701.09 26808.88 27092.64
## DHS0911 TT0302 XS0601 XS0809 CB2424 XS2022 GT0206 BTM1515
## 27105.60 27138.77 27223.78 27593.08 27629.52 27753.26 27775.72 27778.20
## GT0910 TT1201 GH2 DL0810 DL1904 HS2325 DHS0824 DHS0808
## 28043.34 28054.02 28314.02 28403.45 28474.85 28496.89 28701.60 28718.88
## GH1 HS1531 BTM1323 GT2515
## 28730.37 29012.93 29564.26 30208.16
sort(colSums(cazy.mgm1))
## BD1623 GH4 BD1512 NG8 NG7 NG14 CB0122 XS0601
## 13535.55 13791.28 14072.59 14476.19 14543.41 14716.72 14718.56 14858.62
## CB2009 BD1203 GT0910 GT0206 DL0906 CB2424 LS9 LS15
## 15158.24 15372.90 15421.68 15423.15 15490.17 15512.52 15621.44 15661.32
## LS11 BTM2317 XS0809 HS1433 HS2325 XS2022 DL1904 DL0810
## 15742.96 15933.04 16022.71 16071.16 16630.05 16715.92 16996.28 17006.50
## GH2 GT2515 DHS0911 GH1 HS1531 BTM1515 TT1419 DHS0824
## 17012.91 17096.55 17106.44 17144.00 17227.44 17361.87 17600.44 17729.42
## BTM1323 TT0302 TT1201 DHS0808
## 17929.15 18253.39 18461.59 18792.74
bac.amp <- rrarefy(t(bac.amp1),sample = 24903 )
bac.mgm <- data.frame(t(bac.mgm1))
ko.mgm <- data.frame(t(ko.mgm1))
cog.mgm <- data.frame(t(cog.mgm1))
rgi.mgm <- data.frame(t(rgi.mgm1))
cazy.mgm <- data.frame(t(cazy.mgm1))
env.amp1$rgi.mgm.H <- vegan::diversity(rgi.mgm)
env.amp1$cazy.mgm.H <- vegan::diversity(cazy.mgm)
env.amp1$ko.mgm.H <- vegan::diversity(ko.mgm)
env.amp1$cog.mgm.H <- vegan::diversity(cog.mgm)
env.amp1$bac.mgm.H <- vegan::diversity(bac.mgm)
env.amp1$bac.amp.H <- vegan::diversity(bac.amp)
env.amp1$rgi.mgm.S <- vegan::specnumber(rgi.mgm)
env.amp1$cazy.mgm.S <- vegan::specnumber(cazy.mgm)
env.amp1$ko.mgm.S <- vegan::specnumber(ko.mgm)
env.amp1$cog.mgm.S <- vegan::specnumber(cog.mgm)
env.amp1$bac.mgm.S <- vegan::specnumber(bac.mgm)
env.amp1$bac.amp.S <- vegan::specnumber(bac.amp)
env.amp1$cog.mgm.sum <- rowSums(cog.mgm)
env.amp1$rgi.mgm.sum <- rowSums(rgi.mgm)
env.amp1$cazy.mgm.sum <- rowSums(cazy.mgm)
env.amp1$ko.mgm.sum <- rowSums(ko.mgm)
env.amp1$bac.mgm.sum <- rowSums(bac.mgm)
#genomeTraits
genomeTrait<-read_excel("data/genomeTrait/genomeTraits_contigs500.xlsx",sheet = "Sheet1")
genomeTrait$sample_name == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.genomeTraits<-cbind(env.amp1,genomeTrait[,c(3:18)])
#Supplementary Fig. 1
set.seed(315)
bac.amp.pc<-pcoa(vegdist(decostand(bac.amp,"hellinger"),"bray"))
env.amp.pc<-data.frame(bac.amp.pc$vectors[,c("Axis.1", "Axis.2")], env.amp1)
bac.amp.pc1=round(bac.amp.pc$values$Relative_eig[1], 3)*100
bac.amp.pc2=round(bac.amp.pc$values$Relative_eig[2], 3)*100
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")];
colnames(env.sub) <- c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
ef<-envfit(bac.amp.pc$vectors, env.sub, na.rm = TRUE); ef
##
## ***VECTORS
##
## Axis.1 Axis.2 r2 Pr(>r)
## Latitude -0.58162 0.81346 0.7411 0.001 ***
## Longitude -0.39227 0.91985 0.3107 0.003 **
## Altitude -0.72091 0.69303 0.2809 0.005 **
## MAP 0.84119 -0.54073 0.7209 0.001 ***
## MAT 0.57514 -0.81805 0.6895 0.001 ***
## TC -0.40909 0.91249 0.3319 0.003 **
## TN -0.91344 0.40698 0.2067 0.028 *
## TP -0.99964 0.02686 0.4952 0.001 ***
## pH -0.63857 -0.76956 0.7395 0.001 ***
## ACa -0.91707 -0.39872 0.6518 0.001 ***
## AMg -0.61705 -0.78692 0.6484 0.001 ***
## AFe -0.28103 -0.95970 0.4618 0.001 ***
## AK 0.30253 0.95314 0.0971 0.195
## C_N 0.04331 0.99906 0.4170 0.001 ***
## C_P 0.89631 0.44342 0.5332 0.001 ***
## N_P 0.97768 0.21009 0.5716 0.001 ***
## Soil bulk density 0.51270 0.85857 0.3073 0.003 **
## Plant abundance 0.67471 -0.73809 0.1116 0.123
## Plant basal area 0.11058 0.99387 0.1346 0.105
## Plant richness 0.75771 -0.65259 0.5813 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
bac.amp.ef.arrows<-data.frame(ef$vectors$arrows)
bac.amp.ef.arrows$Variable <- row.names(bac.amp.ef.arrows)
m1<-max(abs(env.amp.pc$Axis.1)); m2<-max(abs(env.amp.pc$Axis.2))
gp1<-ggplot(env.amp.pc) +
geom_point(mapping = aes(x = Axis.1, y = Axis.2, colour = site.latitude),size=3, alpha = 0.9) +
labs(x = sprintf("PCo1 (%.1f%%)", bac.amp.pc1), y = sprintf("PCo2 (%.1f%%)", bac.amp.pc2))+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
ggtitle("bac.amp")+
geom_segment(data = bac.amp.ef.arrows, aes(x = 0, xend = Axis.1 * m1, y = 0, yend = Axis.2 * m2), arrow = arrow(length = unit(0.1, "cm")), colour = "grey") +
geom_text_repel(data = bac.amp.ef.arrows, aes(x = Axis.1 * m1, y = Axis.2 * m2, label = Variable),size = 3,colour="black")+theme(axis.title = element_text(color="black",face = "bold"),axis.text = element_text(color="black",face="bold"))+theme_bw()
gp1

#ggsave("pdf2/Fig.S5a_2.PcoA.bact.composition.2023.01.05.pdf", width = 6, height = 4)
set.seed(315)
bac.mgm.pc<-pcoa(vegdist(decostand(bac.mgm,"hellinger"),"bray"))
env.amp.pc<-data.frame(bac.mgm.pc$vectors[,c("Axis.1", "Axis.2")], env.amp1)
bac.mgm.pc1=round(bac.mgm.pc$values$Relative_eig[1], 3)*100
bac.mgm.pc2=round(bac.mgm.pc$values$Relative_eig[2], 3)*100
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")];
colnames(env.sub) <- c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
ef<-envfit(bac.mgm.pc$vectors, env.sub, na.rm = TRUE); ef
##
## ***VECTORS
##
## Axis.1 Axis.2 r2 Pr(>r)
## Latitude 0.77294 -0.63448 0.2325 0.016 *
## Longitude 0.90654 0.42211 0.0220 0.695
## Altitude 0.25494 -0.96696 0.2096 0.023 *
## MAP -0.89232 0.45141 0.5233 0.001 ***
## MAT -0.75999 0.64993 0.2027 0.027 *
## TC 0.82505 -0.56507 0.0352 0.536
## TN 0.99229 0.12393 0.1485 0.080 .
## TP 0.99299 -0.11818 0.5525 0.001 ***
## pH 0.99538 0.09598 0.8623 0.001 ***
## ACa 0.99323 0.11613 0.8357 0.001 ***
## AMg 0.99165 0.12894 0.7697 0.001 ***
## AFe 0.96214 0.27254 0.4073 0.001 ***
## AK -0.99684 0.07940 0.1232 0.118
## C_N -0.83275 -0.55365 0.1489 0.084 .
## C_P -0.98222 0.18771 0.5696 0.001 ***
## N_P -0.96809 0.25059 0.5316 0.001 ***
## Soil bulk density -0.85769 0.51417 0.2538 0.011 *
## Plant abundance -0.75417 0.65667 0.0951 0.182
## Plant basal area -0.42044 -0.90732 0.2931 0.008 **
## Plant richness -0.98393 0.17853 0.3281 0.002 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
bac.mgm.ef.arrows<-data.frame(ef$vectors$arrows)
bac.mgm.ef.arrows$Variable <- row.names(bac.mgm.ef.arrows)
m1<-max(abs(env.amp.pc$Axis.1)); m2<-max(abs(env.amp.pc$Axis.2))
gp2<-ggplot(env.amp.pc) +
geom_jitter(mapping = aes(x = Axis.1, y = Axis.2, colour = site.latitude),size=3, alpha = 0.9) +
labs(x = sprintf("PCo1 (%.1f%%)", bac.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", bac.mgm.pc2))+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
ggtitle("bac.mgm")+
geom_segment(data = bac.mgm.ef.arrows, aes(x = 0, xend = Axis.1 * m1, y = 0, yend = Axis.2 * m2), arrow = arrow(length = unit(0.1, "cm")), colour = "grey") +
geom_text_repel(data = bac.mgm.ef.arrows, aes(x = Axis.1 * m1, y = Axis.2 * m2, label = Variable),size = 3)+theme(axis.title = element_text(color="black",face = "bold"),axis.text = element_text(color="black",face="bold"))+theme_bw()
gp2

#ggsave("pdf2/Fig.S5b.PcoA.bact.composition.2022.12.29.pdf", width = 6, height = 4)
set.seed(315)
ko.mgm.pc<-pcoa(vegdist(decostand(ko.mgm,"hellinger"),"bray"))
env.amp.pc<-data.frame(ko.mgm.pc$vectors[,c("Axis.1", "Axis.2")], env.amp1)
ko.mgm.pc1=round(ko.mgm.pc$values$Relative_eig[1], 3)*100
ko.mgm.pc2=round(ko.mgm.pc$values$Relative_eig[2], 3)*100
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")];
colnames(env.sub) <- c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
ef<-envfit(ko.mgm.pc$vectors, env.sub, na.rm = TRUE); ef
##
## ***VECTORS
##
## Axis.1 Axis.2 r2 Pr(>r)
## Latitude 0.33207 -0.94325 0.8185 0.001 ***
## Longitude 0.20850 -0.97802 0.3417 0.001 ***
## Altitude 0.21586 -0.97642 0.2179 0.021 *
## MAP -0.61804 0.78614 0.7433 0.001 ***
## MAT -0.30433 0.95257 0.7992 0.001 ***
## TC 0.27507 -0.96143 0.3210 0.002 **
## TN 0.80660 -0.59110 0.2397 0.010 **
## TP 0.87791 -0.47883 0.6641 0.001 ***
## pH 0.99839 0.05665 0.8542 0.001 ***
## ACa 0.94745 -0.31991 0.8691 0.001 ***
## AMg 0.97584 0.21849 0.7660 0.001 ***
## AFe 0.68100 0.73228 0.5953 0.001 ***
## AK -0.53732 -0.84338 0.2051 0.025 *
## C_N -0.25060 -0.96809 0.5882 0.001 ***
## C_P -0.99973 0.02317 0.5825 0.001 ***
## N_P -0.93871 0.34472 0.5918 0.001 ***
## Soil bulk density -0.99807 -0.06204 0.2029 0.023 *
## Plant abundance -0.82764 0.56127 0.0713 0.276
## Plant basal area -0.49375 -0.86960 0.2786 0.009 **
## Plant richness -0.51766 0.85559 0.6935 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
ko.mgm.ef.arrows<-data.frame(ef$vectors$arrows)
ko.mgm.ef.arrows$Variable <- row.names(ko.mgm.ef.arrows)
m1<-max(abs(env.amp.pc$Axis.1)); m2<-max(abs(env.amp.pc$Axis.2))
gp3<-ggplot(env.amp.pc) +
geom_point(mapping = aes(x = Axis.1, y = Axis.2, colour = site.latitude),size=3, alpha = 0.9) +
labs(x = sprintf("PCo1 (%.1f%%)", ko.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", ko.mgm.pc2))+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
ggtitle("ko.mgm")+
geom_segment(data = ko.mgm.ef.arrows, aes(x = 0, xend = Axis.1 * m1, y = 0, yend = Axis.2 * m2), arrow = arrow(length = unit(0.1, "cm")), colour = "grey") +
geom_text_repel(data = ko.mgm.ef.arrows, aes(x = Axis.1 * m1, y = Axis.2 * m2, label = Variable),size = 3)+theme(axis.title = element_text(color="black",face = "bold"),axis.text = element_text(color="black",face="bold"))+theme_bw()
gp3

#ggsave("pdf2/Fig.S5c.PcoA.KO.composition.2023.01.05.pdf", width = 6, height = 4)
set.seed(315)
rgi.mgm.pc<-pcoa(vegdist(decostand(rgi.mgm,"hellinger"),"bray"))
env.amp.pc<-data.frame(rgi.mgm.pc$vectors[,c("Axis.1", "Axis.2")], env.amp1)
rgi.mgm.pc1=round(rgi.mgm.pc$values$Relative_eig[1], 3)*100
rgi.mgm.pc2=round(rgi.mgm.pc$values$Relative_eig[2], 3)*100
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")];
colnames(env.sub) <- c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
ef<-envfit(rgi.mgm.pc$vectors, env.sub, na.rm = TRUE); ef
##
## ***VECTORS
##
## Axis.1 Axis.2 r2 Pr(>r)
## Latitude 0.42041 -0.90733 0.6997 0.001 ***
## Longitude 0.36617 -0.93055 0.2295 0.014 *
## Altitude 0.16282 -0.98666 0.3524 0.003 **
## MAP -0.66560 0.74631 0.7434 0.001 ***
## MAT -0.39123 0.92029 0.6587 0.001 ***
## TC 0.48621 -0.87384 0.2164 0.014 *
## TN 0.95321 -0.30231 0.2706 0.006 **
## TP 0.93787 -0.34698 0.7011 0.001 ***
## pH 0.99811 -0.06138 0.7752 0.001 ***
## ACa 0.93096 -0.36513 0.8717 0.001 ***
## AMg 0.99595 0.08995 0.7199 0.001 ***
## AFe 0.73353 0.67966 0.5364 0.001 ***
## AK -0.59416 -0.80435 0.1883 0.033 *
## C_N -0.31710 -0.94839 0.3682 0.001 ***
## C_P -0.99648 0.08382 0.6073 0.001 ***
## N_P -0.94882 0.31582 0.6166 0.001 ***
## Soil bulk density -0.95857 0.28486 0.1732 0.045 *
## Plant abundance -0.72661 0.68705 0.0952 0.182
## Plant basal area -0.52424 -0.85157 0.2682 0.010 **
## Plant richness -0.60895 0.79321 0.6717 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
rgi.mgm.ef.arrows<-data.frame(ef$vectors$arrows)
rgi.mgm.ef.arrows$Variable <- row.names(rgi.mgm.ef.arrows)
m1<-max(abs(env.amp.pc$Axis.1)); m2<-max(abs(env.amp.pc$Axis.2))
gp4<-ggplot(env.amp.pc) +
geom_point(mapping = aes(x = Axis.1, y = Axis.2, colour = site.latitude),size=3, alpha = 0.9) +
labs(x = sprintf("PCo1 (%.1f%%)", rgi.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", rgi.mgm.pc2))+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
ggtitle("arg.mgm")+
geom_segment(data = rgi.mgm.ef.arrows, aes(x = 0, xend = Axis.1 * m1, y = 0, yend = Axis.2 * m2), arrow = arrow(length = unit(0.1, "cm")), colour = "grey") +
geom_text_repel(data = rgi.mgm.ef.arrows, aes(x = Axis.1 * m1, y = Axis.2 * m2, label = Variable),size = 3)+theme(axis.title = element_text(color="black",face = "bold"),axis.text = element_text(color="black",face="bold"))+theme_bw()
gp4

#ggsave("pdf2/Fig.S5d.PcoA.ARG.composition.2023.01.05.pdf", width = 6, height = 4)
set.seed(315)
cazy.mgm.pc<-pcoa(vegdist(decostand(cazy.mgm,"hellinger"),"bray"))
env.amp.pc<-data.frame(cazy.mgm.pc$vectors[,c("Axis.1", "Axis.2")], env.amp1)
cazy.mgm.pc1=round(cazy.mgm.pc$values$Relative_eig[1], 3)*100
cazy.mgm.pc2=round(cazy.mgm.pc$values$Relative_eig[2], 3)*100
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")];
colnames(env.sub) <- c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
ef<-envfit(cazy.mgm.pc$vectors, env.sub, na.rm = TRUE); ef
##
## ***VECTORS
##
## Axis.1 Axis.2 r2 Pr(>r)
## Latitude 0.56039 -0.82823 0.4734 0.001 ***
## Longitude 0.50368 -0.86389 0.1369 0.083 .
## Altitude 0.22787 -0.97369 0.3392 0.003 **
## MAP -0.89544 0.44519 0.5415 0.001 ***
## MAT -0.53433 0.84528 0.4425 0.001 ***
## TC 0.46764 -0.88392 0.2316 0.010 **
## TN 0.84644 -0.53249 0.3241 0.003 **
## TP 0.97370 -0.22783 0.6940 0.001 ***
## pH 0.88977 0.45641 0.8580 0.001 ***
## ACa 0.95824 0.28597 0.8356 0.001 ***
## AMg 0.87404 0.48586 0.7377 0.001 ***
## AFe 0.74281 0.66950 0.5055 0.001 ***
## AK -0.57390 -0.81892 0.1541 0.058 .
## C_N -0.31248 -0.94993 0.1956 0.020 *
## C_P -0.97849 0.20628 0.5798 0.001 ***
## N_P -0.91898 0.39431 0.5993 0.001 ***
## Soil bulk density -0.98394 0.17852 0.2034 0.034 *
## Plant abundance -0.81619 0.57778 0.0740 0.281
## Plant basal area -0.26681 -0.96375 0.2845 0.008 **
## Plant richness -0.83938 0.54355 0.4920 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
cazy.mgm.ef.arrows<-data.frame(ef$vectors$arrows)
cazy.mgm.ef.arrows$Variable <- row.names(cazy.mgm.ef.arrows)
m1<-max(abs(env.amp.pc$Axis.1)); m2<-max(abs(env.amp.pc$Axis.2))
gp5<-ggplot(env.amp.pc) +
geom_point(mapping = aes(x = Axis.1, y = Axis.2 * -1, colour = site.latitude),size=3, alpha = 0.9) +
labs(x = sprintf("PCo1 (%.1f%%)", cazy.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", cazy.mgm.pc2))+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
ggtitle("cazy.mgm")+
geom_segment(data = cazy.mgm.ef.arrows, aes(x = 0, xend = Axis.1 * m1, y = 0, yend = Axis.2 * m2), arrow = arrow(length = unit(0.1, "cm")), colour = "grey") +
geom_text_repel(data = cazy.mgm.ef.arrows, aes(x = Axis.1 * m1, y = Axis.2 * m2, label = Variable),size = 3)+theme(axis.title = element_text(color="black",face = "bold"),axis.text = element_text(color="black",face="bold"))+theme_bw()
gp5

#ggsave("pdf2/Fig.S5e.PcoA.CAZy.composition.2023.01.05.pdf", width = 6, height = 4)
#Supplementary Fig. 2
p1<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= pH, y=bac.amp.H, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.amp1,aes(x= pH, y=bac.amp.H), method ="lm", col= "white")+
labs(x = "Soil pH",y = "H'.16S")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6.5,y=6.3, label="R2 = 0.481\nP = 1.653e-06",
size=5,color="black")
p1

#ggsave("pdf/Fig.1a.corr.pH.H16S.2023.04.18.pdf", width = 4, height = 3)
summary(lm(env.amp1$bac.amp.H~env.amp1$pH))
##
## Call:
## lm(formula = env.amp1$bac.amp.H ~ env.amp1$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40585 -0.16282 -0.04642 0.14106 0.50752
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.56163 0.18483 30.090 < 2e-16 ***
## env.amp1$pH 0.21297 0.03683 5.782 1.65e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2254 on 34 degrees of freedom
## Multiple R-squared: 0.4958, Adjusted R-squared: 0.481
## F-statistic: 33.43 on 1 and 34 DF, p-value: 1.653e-06
shapiro.test(residuals(lm(env.amp1$bac.amp.H~env.amp1$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.amp1$bac.amp.H ~ env.amp1$pH))
## W = 0.97818, p-value = 0.6835
p3<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= pH, y=ko.mgm.H, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.amp1,aes(x= pH, y=ko.mgm.H), col= "white")+
labs(x = "Soil pH",y = "H'.KO")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6.5,y=7.9, label="R2 = 0.199\nP = 0.009",
size=5,color="black")
p3

#ggsave("pdf/Fig.1c.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
m2<-lm(env.amp1$ko.mgm.H ~ env.amp1$pH + I(env.amp1$pH^2) )
AIC(m2)
## [1] -158.4754
summary(m2)
##
## Call:
## lm(formula = env.amp1$ko.mgm.H ~ env.amp1$pH + I(env.amp1$pH^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.055544 -0.016888 0.003081 0.019042 0.055272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.104670 0.117085 69.221 <2e-16 ***
## env.amp1$pH -0.087095 0.045769 -1.903 0.0658 .
## I(env.amp1$pH^2) 0.007155 0.004317 1.657 0.1069
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02503 on 33 degrees of freedom
## Multiple R-squared: 0.245, Adjusted R-squared: 0.1992
## F-statistic: 5.354 on 2 and 33 DF, p-value: 0.009689
shapiro.test(residuals(m2))
##
## Shapiro-Wilk normality test
##
## data: residuals(m2)
## W = 0.95967, p-value = 0.21
p5<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= pH, y = rgi.mgm.H, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.amp1,aes(x= pH, y=rgi.mgm.H), method = "lm", color = "white")+
labs(x = "Soil pH",y = "H'.ARG")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6.5,y=4.9, label="R2 = 0.190\nP = 0.004",
size=5,color="black")
p5

summary(lm(env.amp1$rgi.mgm.H~env.amp1$pH))
##
## Call:
## lm(formula = env.amp1$rgi.mgm.H ~ env.amp1$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.040812 -0.012858 0.000664 0.014349 0.049838
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.917865 0.017772 276.712 < 2e-16 ***
## env.amp1$pH -0.010755 0.003542 -3.037 0.00457 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02168 on 34 degrees of freedom
## Multiple R-squared: 0.2133, Adjusted R-squared: 0.1902
## F-statistic: 9.221 on 1 and 34 DF, p-value: 0.004571
shapiro.test(residuals(lm(env.amp1$rgi.mgm.H~env.amp1$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.amp1$rgi.mgm.H ~ env.amp1$pH))
## W = 0.98332, p-value = 0.8506
#ggsave("pdf2/Fig.2b.corr.pH.HARG.2022.12.29.pdf", width = 4, height = 3)
p7<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= pH, y = cazy.mgm.H, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.amp1,aes(x= pH, y=cazy.mgm.H), method = "lm", color = "white")+
labs(x = "Soil pH",y = "H'.Cazy")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6.5,y=3.55, label="R2 = 0.833\nP = 5.52e-15",
size=5,color="black")
p7

summary(lm(env.amp1$cazy.mgm.H~env.amp1$pH))
##
## Call:
## lm(formula = env.amp1$cazy.mgm.H ~ env.amp1$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.033579 -0.011907 -0.002831 0.011534 0.057275
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.729231 0.017529 212.74 < 2e-16 ***
## env.amp1$pH -0.046286 0.003493 -13.25 5.52e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02138 on 34 degrees of freedom
## Multiple R-squared: 0.8378, Adjusted R-squared: 0.833
## F-statistic: 175.6 on 1 and 34 DF, p-value: 5.52e-15
shapiro.test(residuals(lm(env.amp1$cazy.mgm.H~env.amp1$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.amp1$cazy.mgm.H ~ env.amp1$pH))
## W = 0.97247, p-value = 0.4966
#ggsave("pdf2/Fig.2c.corr.pH.HCazy.2022.12.29.pdf", width = 4, height = 3)
p1.1<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= round(ko.mgm.H,digits = 2), y=bac.amp.H, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.amp1,aes(x= ko.mgm.H, y=bac.amp.H), method ="lm", col= "white")+
labs(x = "H'.KO",y = "H'.16S")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=7.87,y=7.2, label="R2 = 0.448\nP = 4.919e-06",
size=5,color="black")
p1.1

#ggsave("pdf/Fig.1b.corr.pH.H16S.2023.04.18.pdf", width = 4, height = 3)
summary(lm(env.amp1$bac.amp.S~env.amp1$ko.mgm.S))
##
## Call:
## lm(formula = env.amp1$bac.amp.S ~ env.amp1$ko.mgm.S)
##
## Residuals:
## Min 1Q Median 3Q Max
## -791.69 -335.78 -52.81 349.68 876.01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19461.2002 2955.9843 6.584 1.52e-07 ***
## env.amp1$ko.mgm.S -2.0217 0.3731 -5.418 4.92e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 492.1 on 34 degrees of freedom
## Multiple R-squared: 0.4633, Adjusted R-squared: 0.4476
## F-statistic: 29.36 on 1 and 34 DF, p-value: 4.919e-06
shapiro.test(residuals(lm(env.amp1$bac.amp.S~env.amp1$ko.mgm.S)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.amp1$bac.amp.S ~ env.amp1$ko.mgm.S))
## W = 0.95219, p-value = 0.1225
p3.1<-ggplot() + geom_jitter(data=env.genomeTraits, height = 0, aes(x= AGS2, y=round(ko.mgm.H,digits = 2), color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.genomeTraits,aes(x= AGS2, y=ko.mgm.H), method = "lm",col= "white")+
labs(x = "Average Genome Size (Mbp)",y = "H'.KO")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6,y=7.89, label="R2 = 0.003\nP = 0.721",
size=5,color="black")
p3.1

#ggsave("pdf2/Fig.2a.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
summary(lm(env.genomeTraits$ko.mgm.H~env.genomeTraits$AGS2))
##
## Call:
## lm(formula = env.genomeTraits$ko.mgm.H ~ env.genomeTraits$AGS2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.057311 -0.020494 -0.004491 0.020100 0.050093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.863868 0.019854 396.08 <2e-16 ***
## env.genomeTraits$AGS2 -0.001558 0.004321 -0.36 0.721
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02833 on 34 degrees of freedom
## Multiple R-squared: 0.003806, Adjusted R-squared: -0.02549
## F-statistic: 0.1299 on 1 and 34 DF, p-value: 0.7208
shapiro.test(residuals(lm(env.genomeTraits$ko.mgm.H~env.genomeTraits$AGS2)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.genomeTraits$ko.mgm.H ~ env.genomeTraits$AGS2))
## W = 0.97055, p-value = 0.4407
p5.1<-ggplot() + geom_jitter(data=env.genomeTraits, height = 0, aes(x= AGS2, y=round(rgi.mgm.H,digits = 2), color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.genomeTraits,aes(x= AGS2, y=rgi.mgm.H), method = "lm",col= "white")+
labs(x = "Average Genome Size (Mbp)",y = "H'.ARG")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6,y=4.9, label="R2 = 0.280\nP = 0.0005",
size=5,color="black")
p5.1

#ggsave("pdf2/Fig.2a.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
summary(lm(env.genomeTraits$rgi.mgm.H~env.genomeTraits$AGS2))
##
## Call:
## lm(formula = env.genomeTraits$rgi.mgm.H ~ env.genomeTraits$AGS2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.048815 -0.013871 0.002497 0.014775 0.035015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.811816 0.014322 335.965 < 2e-16 ***
## env.genomeTraits$AGS2 0.011924 0.003117 3.825 0.000533 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02044 on 34 degrees of freedom
## Multiple R-squared: 0.3008, Adjusted R-squared: 0.2803
## F-statistic: 14.63 on 1 and 34 DF, p-value: 0.0005334
shapiro.test(residuals(lm(env.genomeTraits$rgi.mgm.H~env.genomeTraits$AGS2)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.genomeTraits$rgi.mgm.H ~ env.genomeTraits$AGS2))
## W = 0.96851, p-value = 0.3862
p7.1<-ggplot() + geom_jitter(data=env.genomeTraits, height = 0, aes(x= AGS2, y=cazy.mgm.H, color = site.latitude), alpha = 0.8, size =5) +
geom_smooth(data=env.genomeTraits,aes(x= AGS2, y=cazy.mgm.H), method = "lm",col= "white")+
labs(x = "Average Genome Size (Mbp)",y = "H'.Cazy")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6,y=3.6, label="R2 = 0.305\nP = 0.0003",
size=5,color="black")
p7.1

#ggsave("pdf2/Fig.2a.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
summary(lm(env.genomeTraits$cazy.mgm.H~env.genomeTraits$AGS2))
##
## Call:
## lm(formula = env.genomeTraits$cazy.mgm.H ~ env.genomeTraits$AGS2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.080577 -0.029966 0.002461 0.026830 0.081365
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.381767 0.030572 110.618 < 2e-16 ***
## env.genomeTraits$AGS2 0.026902 0.006654 4.043 0.000286 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04362 on 34 degrees of freedom
## Multiple R-squared: 0.3247, Adjusted R-squared: 0.3048
## F-statistic: 16.34 on 1 and 34 DF, p-value: 0.0002864
shapiro.test(residuals(lm(env.genomeTraits$cazy.mgm.H~env.genomeTraits$AGS2)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.genomeTraits$cazy.mgm.H ~ env.genomeTraits$AGS2))
## W = 0.97741, p-value = 0.6576
#Supplementary Fig. 3
library(linkET)
library(dplyr)
colnames(bac.amp1) == colnames(ko.mgm1)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
bac.amp2<-data.frame(t(bac.amp1))
bac.amp2<-data.frame(apply(bac.amp2,1,function(x){x/sum(x)}))
ko.mgm2<-data.frame(t(ko.mgm1))
ko.mgm2<-data.frame(apply(ko.mgm2,1,function(x){x/sum(x)}))
bac.ko<-cbind(data.frame(t(bac.amp1)),data.frame(t(ko.mgm1)))
env.mantel<-env.genomeTraits[,c("AGS2","ACN8","GC_all","Growthrate",
"bac.amp.S","bac.amp.H","ko.mgm.S","ko.mgm.H",
"latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N",
"C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]
colnames(env.mantel)<-c("AGS","ACN","GC content","Doubling time",
"S.16S","H'.16S","S.KO","H'.KO",
"Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
#plot
mantel <- mantel_test(bac.ko, env.mantel,
spec_select = list(BacteriaCom = 1:22579,
FunctionCom = 22580:33644),
spec_dist = dist_func(.FUN = "vegdist", method = "bray"),
env_dist = dist_func(.FUN = "vegdist", method = "euclidean")) %>%
mutate(rd = cut(r, breaks = c(-Inf, 0.2, 0.4, Inf),
labels = c("< 0.2", "0.2 - 0.4", ">= 0.4")),
pd = cut(p, breaks = c(-Inf, 0.01, 0.05, Inf),
labels = c("< 0.01", "0.01 - 0.05", ">= 0.05")))
qcorrplot(correlate(env.mantel,method = "spearman"),type = "lower", diag = FALSE,
grid_size = 0.25) +
geom_square() +
geom_mark(sep = '\n',size=5,sig_level = c(0.05,0.01,0.001),sig_thres = 0.05,
only_mark = TRUE)+
geom_couple(aes(colour = pd, size = rd), data = mantel, curvature = 0.1) +
#scale_fill_gradientn(colours = RColorBrewer::brewer.pal(11, "RdYlBu")) +
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
scale_size_manual(values = c(0.5, 1, 2)) +
scale_colour_manual(values = c("#f6e8c3", "#c7eae5", "#A2A2A288")) +
#scale_colour_manual(values = color_pal(3, 0)) +
guides(size = guide_legend(title = "Mantel's r",
override.aes = list(colour = "grey35"),
order = 2),
colour = guide_legend(title = "Mantel's p",
override.aes = list(size = 3),
order = 1),
fill = guide_colorbar(title = "Spearman's rho", order = 3))

#Supplementary Fig. 4
p1<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= pH, y=bac.mgm.H, color = site.latitude), alpha = 0.8, size =5) +
#geom_smooth(data=env.amp1,aes(x= pH, y=bac.mgm.H), method ="lm", col= "white")+
labs(x = "Soil pH",y = "H'.Bacteria.mgm")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6.5,y=6.3, label="R2 = 0.009\nP = 0.253",
size=5,color="black")
p1

#ggsave("pdf2/Fig.S2A.corr.pH.H16S.2023.04.18.pdf", width = 6, height = 4)
summary(lm(env.amp1$bac.mgm.H~env.amp1$pH))
##
## Call:
## lm(formula = env.amp1$bac.mgm.H ~ env.amp1$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63980 -0.18019 0.01608 0.20693 0.48888
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.34786 0.23546 26.960 <2e-16 ***
## env.amp1$pH 0.05446 0.04692 1.161 0.254
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2872 on 34 degrees of freedom
## Multiple R-squared: 0.03812, Adjusted R-squared: 0.009826
## F-statistic: 1.347 on 1 and 34 DF, p-value: 0.2538
shapiro.test(residuals(lm(env.amp1$bac.mgm.H~env.amp1$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.amp1$bac.mgm.H ~ env.amp1$pH))
## W = 0.98188, p-value = 0.807
library(lme4)
## 载入需要的程辑包:Matrix
##
## 载入程辑包:'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(lmerTest)
##
## 载入程辑包:'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
lm<-lmer(bac.amp.H~1+pH+(1|latitude),data = env.amp1,
REML = FALSE,control=lmerControl(optimizer="bobyqa"))
shapiro.test(residuals(lm))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm)
## W = 0.97453, p-value = 0.5614
summary(lm)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: bac.amp.H ~ 1 + pH + (1 | latitude)
## Data: env.amp1
## Control: lmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## -0.6 5.8 4.3 -8.6 32
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8410 -0.6192 -0.1238 0.4461 1.9018
##
## Random effects:
## Groups Name Variance Std.Dev.
## latitude (Intercept) 0.009959 0.09979
## Residual 0.038044 0.19505
## Number of obs: 36, groups: latitude, 12
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.57559 0.21061 13.64329 26.47 4.01e-13 ***
## pH 0.21013 0.04194 13.72478 5.01 0.000203 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## pH -0.979
tab_model(lm)
|
|
bac.amp.H
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
5.58
|
5.15 – 6.00
|
<0.001
|
|
pH
|
0.21
|
0.12 – 0.30
|
<0.001
|
|
Random Effects
|
|
σ2
|
0.04
|
|
τ00 latitude
|
0.01
|
|
ICC
|
0.21
|
|
N latitude
|
12
|
|
Observations
|
36
|
|
Marginal R2 / Conditional R2
|
0.496 / 0.601
|
p2<-ggplot() + geom_jitter(data=env.amp1, height = 0, aes(x= pH, y=bac.mgm.S, color = site.latitude), alpha = 0.8, size =5) +
#geom_smooth(data=env.amp1,aes(x= pH, y=bac.mgm.S), method ="loess", col= "white")+
labs(x = "Soil pH",y = "S.Bacteria.mgm")+
theme_bw()+
scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
theme(legend.title = element_text(colour="black", size=15, face="bold"),
legend.text = element_text(colour="black", size=15, face="bold"),
axis.text=element_text(size=15,face="bold"),
axis.title=element_text(size=15,face="bold"))+
annotate("text",x=6.5,y=33500, label="R2 = 0.0184\nP = 0.429",
size=5,color="black")
p2

#ggsave("pdf2/Fig.S2B.corr.pH.H16S.2023.04.18.pdf", width = 6, height = 4)
summary(lm(env.amp1$bac.mgm.S~env.amp1$pH))
##
## Call:
## lm(formula = env.amp1$bac.mgm.S ~ env.amp1$pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -827.16 -280.83 0.17 155.35 958.18
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33869.01 322.51 105.0 <2e-16 ***
## env.amp1$pH 51.42 64.27 0.8 0.429
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 393.3 on 34 degrees of freedom
## Multiple R-squared: 0.01848, Adjusted R-squared: -0.01039
## F-statistic: 0.6402 on 1 and 34 DF, p-value: 0.4292
shapiro.test(residuals(lm(env.amp1$bac.mgm.S~env.amp1$pH)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(env.amp1$bac.mgm.S ~ env.amp1$pH))
## W = 0.96964, p-value = 0.4158
#Supplementary Fig. 5
env.amp1$sample_name==env.genomeTraits$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(env.amp1[,c("bac.mgm.H","bac.mgm.S","bac.amp.H","bac.amp.S", "ko.mgm.H","ko.mgm.S","rgi.mgm.H","rgi.mgm.S", "cazy.mgm.H","cazy.mgm.S")])
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(r)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(cor.out$r, 2)
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
cor.out$Y <- factor(cor.out$Y, levels = c("bac.mgm.H","bac.mgm.S","bac.amp.H","bac.amp.S", "ko.mgm.H","ko.mgm.S","rgi.mgm.H","rgi.mgm.S", "cazy.mgm.H","cazy.mgm.S"))
cor.out$Y <- factor(cor.out$Y, labels = c("H'.bacteria.mgm","S.bacteria.mgm","H'.16S","S.16S","H'.KO","S.KO","H'.ARG","S.ARG","H'.Cazy","S.Cazy"))
ggplot(cor.out, aes(Y,X)) +
geom_tile(aes(fill = r), size=1)+
geom_text(aes(label = r), size = 1.5)+
scale_fill_gradient(guide = "legend", high='green', low='blue')+
theme(axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=8, face="bold",angle = 20),
axis.text.y=element_text(colour="black", size=8, face="bold"))

cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
p4<-ggplot(cor.out.sig, aes(Y,X)) +
geom_tile(aes(fill = r), size=1)+
geom_text(data= cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 2.5, font="bold")+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
theme(axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=12, face="bold",angle = 90, hjust=1,vjust = 0.5),
axis.text.y=element_text(colour="black", size=12, face="bold"))
p4

#Supplementary Fig. 6a
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(cog.mgm1))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:480] 0.2 0.15 0.03 -0.43 -0.12 -0.08 -0.01 0.24 0.58 0.49 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"COGs"
ggplot(cor.out.sig, aes(Y,X)) +
facet_grid(.~ Y_2, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_text(data= cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 2, font="bold")+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue')+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
theme(axis.title= element_blank(),
strip.text = element_text(colour="black",size = 12),
axis.text.x=element_text(colour="black", size=12, face="bold"),
axis.text.y=element_text(colour="black", size=12, face="bold"))

#ggsave("pdf2/Fig.S3A.corrHeatmap.env.cog.genes.2023.01.05.pdf", width = 10, height = 4)
#Supplementary Fig. 6b
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.mgm1))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:221300] -0.06 -0.01 0.25 0.13 -0.03 -0.19 -0.28 -0.33 -0.31 -0.2 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"All annotated KOs"
ggplot(cor.out.sig, aes(Y,X)) +
facet_grid(.~ Y_2, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
theme(strip.text = element_text(size = 12,face="bold", color = c("black")),
#strip.background = element_rect(fill = "grey"),
panel.spacing = unit(0.1, "lines"),
axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=15, face="bold"))

#ggsave("pdf2/Fig.S3B.corrHeatmap.env.ko.genes.2023.01.05.pdf", width = 20, height = 5)
#Supplementary Fig. 7
#func <- c("Ribosome","Aminoacyl.tRNA.biosynthesis")
#nam <- "Translation"
#hi <- 15
Heatmap.genes <- function (func, nam, hi){
ko.ID<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "KEGG3")
ko.ID.sub <- data.frame(ko.ID[ko.ID$Level3 %in% func,])
ko.mgm.sub <- ko.mgm1[row.names(ko.mgm1) %in% ko.ID.sub$KO,]
ko.ID.sub <- ko.ID.sub[ko.ID.sub$KO %in% row.names(ko.mgm1) ,]
ko.ID.sub <- ko.ID.sub[order(ko.ID.sub$KO),]
row.names(ko.mgm.sub) == ko.ID.sub$KO
ko.ID.sub$KO.id <- paste0(ko.ID.sub$Level3, "_",ko.ID.sub$KO)
row.names(ko.mgm.sub) <- ko.ID.sub$KO.id
ko.mgm.sub <- ko.mgm.sub[order(ko.ID.sub$No),]
env.raw1<-env.amp1
rownames(env.raw1)<-env.raw1$sample_name
env.sel<-env.raw1[row.names(env.raw1) %in% colnames(ko.mgm.sub),]
function.sel<-data.frame(t(ko.mgm.sub))
row.names(env.sel) == row.names(function.sel)
env.sel[, paste0(func, ".RA")] <- rowSums(function.sel)
env.sel[, paste0(func, ".H")] <- vegan::diversity(function.sel)
env.sel[, paste0(func, ".S")] <- vegan::specnumber(function.sel)
cog.mgm <- data.frame(t(cog.mgm1))
row.names(cog.mgm) == row.names(env.sel)
env.sel$cog.mgm.sum <- rowSums(cog.mgm)
env.cog <- data.frame(env.sel,cog.mgm )
env.cog$rib.pct <- env.cog$J/env.cog$cog.mgm.sum
env.cog$transcript.pct <- env.cog$K/env.cog$cog.mgm.sum
env.cog$defense.pct <- env.cog$V/env.cog$cog.mgm.sum
env.tmp1<-env.cog[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp1<-data.frame(apply(env.tmp1,2,as.numeric))
env.tmp2 <- function.sel
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
#cor.out$Y
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_1 <- factor(cor.out$Y_1, levels = func)
p0<-ggplot(cor.out, aes(Y_2,X)) +
facet_grid(cols = vars(Y_1), scales = "free", space = "free")+
geom_tile(aes(fill = r), size=1)+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
theme(axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=8, face="bold",angle = 90),
axis.text.y=element_text(colour="black", size=12, face="bold"))
#print(p0)
cor.out.sig <-cor.out
cor.out.sig$r[cor.out.sig$p > 0.05] <- 0
p1<-ggplot(cor.out.sig, aes(Y_2,X)) +
facet_grid(cols = vars(Y_1), scales = "free", space = "free")+
geom_tile(aes(fill = r), size=1)+
geom_text(data= cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 1.5, font="bold")+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
theme_bw()+
theme(axis.title= element_blank(),
strip.text = element_text(size = 12,face="bold", color = "white"),
strip.background = element_rect(fill = "blue"),
panel.spacing = unit(0, "lines"),
axis.text.x=element_text(colour="black", size=8, face="bold",angle = 90,vjust = 0.5),
axis.text.y=element_text(colour="black", size=12,face="bold"))
#ggsave(paste0("pdf2/Fig.S4.Heatmap2.",nam,".2023.01.05.pdf"), width = hi, height = 6)
print(p1)
}
Heatmap.genes( c("Ribosome","Aminoacyl.tRNA.biosynthesis"), "Translation", 20 )
## num [1:3280] 0.43 0.28 0.12 -0.47 -0.41 0.33 0.28 0.52 0.52 0.71 ...

#Supplementary Fig. 8
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="GBM",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Arabinogalactan.biosynthesis.Mycobacterium"
## [2] "Exopolysaccharide.biosynthesis"
## [3] "Glycosaminoglycan.biosynthesis.chondroitin.sulfate.dermatan.sulfate"
## [4] "Glycosaminoglycan.biosynthesis.heparan.sulfate.heparin"
## [5] "Glycosaminoglycan.degradation"
## [6] "Glycosphingolipid.biosynthesis.ganglio.series"
## [7] "Glycosphingolipid.biosynthesis.globo.and.isoglobo.series"
## [8] "Glycosphingolipid.biosynthesis.lacto.and.neolacto.series"
## [9] "Glycosylphosphatidylinositol.anchor.biosynthesis"
## [10] "Lipoarabinomannan.biosynthesis"
## [11] "Lipopolysaccharide.biosynthesis"
## [12] "Mannose.type.O.glycan.biosynthesis"
## [13] "N.Glycan.biosynthesis"
## [14] "O.Antigen.nucleotide.sugar.biosynthesis"
## [15] "O.Antigen.repeat.unit.biosynthesis"
## [16] "Other.glycan.degradation"
## [17] "Other.types.of.O.glycan.biosynthesis"
## [18] "Peptidoglycan.biosynthesis"
## [19] "Teichoic.acid.biosynthesis"
## [20] "Various.types.of.N.glycan.biosynthesis"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, levels = c( "Arabinogalactan.biosynthesis.Mycobacterium", "Exopolysaccharide.biosynthesis", "Glycosaminoglycan.biosynthesis.chondroitin.sulfate.dermatan.sulfate",
"Glycosaminoglycan.biosynthesis.heparan.sulfate.heparin", "Glycosaminoglycan.degradation", "Glycosphingolipid.biosynthesis.ganglio.series",
"Glycosphingolipid.biosynthesis.globo.and.isoglobo.series", "Glycosphingolipid.biosynthesis.lacto.and.neolacto.series", "Glycosylphosphatidylinositol.anchor.biosynthesis",
"Lipoarabinomannan.biosynthesis", "Lipopolysaccharide.biosynthesis", "O.Antigen.nucleotide.sugar.biosynthesis",
"O.Antigen.repeat.unit.biosynthesis", "Mannose.type.O.glycan.biosynthesis",
"N.Glycan.biosynthesis","Peptidoglycan.biosynthesis","Other.glycan.degradation", "Other.types.of.O.glycan.biosynthesis", "Various.types.of.N.glycan.biosynthesis",
"Teichoic.acid.biosynthesis"))
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "ABM", "Exopolysaccharide.biosynthesis", "C",
"H", "GD", "S",
"GI", "L", "A",
"Lipoarabinomannan", "Lipopolysaccharide.biosynthesis", "O.Antigen.nucleotide.sugar.biosynthesis", "ARUB","M.G",
"N.Glycan", "Peptidoglycan.biosynthesis",
"Other.glycan.D", "O", "VN.glycanB",
"Teichoic.acid.biosyn"))
p1<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=18, face="bold")) +
labs(caption="ABM: Arabinogalactan.biosynthesis.Mycobacterium;C: Glycosaminoglycan.biosynthesis.chondroitin.sulfate.dermatan.sulfate; H: Glycosaminoglycan.biosynthesis.heparan.sulfate.heparin; GD: Glycosaminoglycan.degradation; S: Glycosphingolipid.biosynthesis.ganglio.series; GI: Glycosphingolipid.biosynthesis.globo.and.isoglobo.series; L: Glycosphingolipid.biosynthesis.lacto.and.neolacto.series;
A: Glycosylphosphatidylinositol.anchor.biosynthesis; Lipoarabinomannan: Lipoarabinomannan.biosynthesis;ARUB: O.Antigen.repeat.unit.biosynthesis; M.G: Mannose.type.O.glycan.biosynthesis; N.Glycan: N.Glycan.biosynthesis; Other.glycan.D: Other.glycan.degradation; O: Other.types.of.O.glycan.biosynthesis; VN.glycanB: Various.types.of.N.glycan.biosynthesis,Teichoic.acid.biosyn: Teichoic.acid.biosynthesis" )
p1

#ggsave("pdf2/Fig.S6a.corrHeatmap.env.Glycan.2023.01.05.pdf", width = 30, height = 7)
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(cazy.mgm1))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:2780] 0.24 0.18 -0.22 -0.48 -0.11 0.09 0.12 0.35 0.51 0.61 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"Carbohydrate-active enzymes (CAZy)"
p2<-ggplot(cor.out.sig, aes(Y,X)) +
facet_grid(.~ Y_2, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_text(data= cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 1.5, font="bold")+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
theme(strip.text = element_text(size = 12,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=12, face="bold",angle = 90, hjust=1,vjust = 0.5),
axis.text.y=element_text(colour="black", size=18, face="bold"))
p2

#ggsave("pdf2/Fig.S6b.corrHeatmap.env.cazy.genes.2023.01.05.pdf", width = 20, height = 6)
#Supplementary Fig. 9
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="CM",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Bacterial.chemotaxis" "Flagellar.assembly"
## [3] "Regulation.of.actin.cytoskeleton"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("Bacterial chemotaxis", "Flagellar assembly","RAC"))
p1<- ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=10, face="bold",angle = 90, hjust=1,vjust = 0.5),
axis.text.y=element_text(colour="black", size=15, face="bold")) +
labs(caption="RAC: Regulation of actin cytoskeleton")
p1

#ggsave("pdf2/Fig.S7a.Heatmap.KEGG.cellular.motility.2023.01.05.pdf", width = 20, height = 6)
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="Two component system",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Two.component.system"
#cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("Bacterial chemotaxis", "Flagellar assembly","RAC"))
p2<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue')+
theme(strip.text = element_text(size = 12,face="bold",color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=4, face="bold",angle = 90, hjust=1,vjust = 0.5),
axis.text.y=element_text(colour="black", size=15, face="bold"))
p2

#ggsave("pdf2/Fig.S7b.corrHeatmap.env.Signal.transduction.2023.01.05.pdf", width = 20, height = 6)
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:83460] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility", "Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Cellular.community.prokaryotes","Pentose.and.glucuronate.interconversions",
"Translation", "Energy.metabolism", "Membrane.transport" , "Folding.sorting.and.degradation",
"Metabolism.of.cofactors.and.vitamins" ))
cor.out$Y_4 <- factor(cor.out$Y_4, labels = c("BSS","CM", "XenoBDM","Signal transduction","Cellular community","PGI",
"Translation", "Energy metabolism", "Membrane transport" , "FSD",
"Metabolism of cofactors and vitamins" ))
cor.out.sub<-cor.out[cor.out$Y_4=="Cellular community",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Biofilm.formation" "Quorum.sensing"
p3<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue')+
theme(strip.text = element_text(size = 12,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x=element_text(colour="black", size=4, face="bold",angle = 90, hjust=1),
axis.text.y=element_text(colour="black", size=15, face="bold"))
p3

#ggsave("pdf2/Fig.S7c.corrHeatmap.env.Cellular.community.2023.01.05.pdf", width = 20, height = 6)
#Supplementary Fig. 10
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="MTP",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Biosynthesis.of.12.14.and.16.membered.macrolides"
## [2] "Biosynthesis.of.ansamycins."
## [3] "Biosynthesis.of.enediyne.antibiotics."
## [4] "Biosynthesis.of.siderophore.group.nonribosomal.peptides."
## [5] "Biosynthesis.of.type.II.polyketide.backbone."
## [6] "Biosynthesis.of.type.II.polyketide.products."
## [7] "Biosynthesis.of.vancomycin.group.antibiotics."
## [8] "Carotenoid.biosynthesis."
## [9] "Diterpenoid.biosynthesis.Including.Gibberellin.biosynthesis."
## [10] "Geraniol.degradation."
## [11] "Insect.hormone.biosynthesis."
## [12] "Limonene.and.pinene.degradation."
## [13] "Monoterpenoid.biosynthesis."
## [14] "Nonribosomal.peptide.structures."
## [15] "Polyketide.sugar.unit.biosynthesis."
## [16] "Sesquiterpenoid.and.triterpenoid.biosynthesis."
## [17] "Terpenoid.backbone.biosynthesis."
## [18] "Tetracycline.biosynthesis."
## [19] "Type.I.polyketide.structures."
## [20] "Zeatin.biosynthesis."
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, levels = c( "Biosynthesis.of.12.14.and.16.membered.macrolides", "Biosynthesis.of.ansamycins.", "Biosynthesis.of.enediyne.antibiotics.",
"Biosynthesis.of.siderophore.group.nonribosomal.peptides.", "Biosynthesis.of.type.II.polyketide.backbone.", "Biosynthesis.of.type.II.polyketide.products.",
"Biosynthesis.of.vancomycin.group.antibiotics.", "Carotenoid.biosynthesis.", "Diterpenoid.biosynthesis.Including.Gibberellin.biosynthesis.",
"Geraniol.degradation.", "Insect.hormone.biosynthesis.", "Limonene.and.pinene.degradation.",
"Monoterpenoid.biosynthesis.", "Nonribosomal.peptide.structures.",
"Polyketide.sugar.unit.biosynthesis.","Sesquiterpenoid.and.triterpenoid.biosynthesis.","Terpenoid.backbone.biosynthesis.", "Tetracycline.biosynthesis.", "Type.I.polyketide.structures.",
"Zeatin.biosynthesis."))
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "BMM", "Biosynthesis.of.ansamycins", "Biosynthesis.of.enediyne.antibiotics",
"Biosynthesis.of.siderophore", "BPB", "Biosyn.polyketide",
"Biosyn.vancomycin", "Carotenoid.biosynthesis", "D","Geraniol.D",
"IH", "LPD", "M", "Nonribosomal.P","Polyketide.sugar.unit.biosynthesis",
"STB", "Terpenoid.backbone.biosynthesis",
"TB", "Type.I.polyketide.structures", "ZB"))
p1<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue')+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=18, face="bold")) +
labs(caption="BBM: Biosynthesis.of.12.14.and.16.membered.macrolides;Biosynthesis.of.siderophore: Biosynthesis.of.siderophore.group.nonribosomal.peptides; BPB: Biosynthesis.of.type.II.polyketide.backbone; Biosyn.polyketide: Biosynthesis.of.type.II.polyketide.products; Biosyn.vancomycin: Biosynthesis.of.vancomycin.group.antibiotics;
D: Diterpenoid.biosynthesis.Including.Gibberellin.biosynthesis; Geraniol.D: Geraniol.degradation; IH: Insect.hormone.biosynthesis; LPD: Limonene.and.pinene.degradation; M: Monoterpenoid.biosynthesis; Nonribosomal.P: Nonribosomal.peptide.structures; TB: Terpenoid.backbone.biosynthesis; ZB: Zeatin.biosynthesis" )
p1

#ggsave("pdf2/Fig.S8a.corrHeatmap.env.Metabolism.Terpenoid.2023.01.05.pdf", width = 30, height = 7)
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="XenoBDM",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Aminobenzoate.degradation"
## [2] "Atrazine.degradation"
## [3] "Benzoate.degradation"
## [4] "Bisphenol.degradation"
## [5] "Caprolactam.degradation"
## [6] "Chloroalkane.and.chloroalkene.degradation"
## [7] "Chlorocyclohexane.and.chlorobenzene.degradation"
## [8] "Dioxin.degradation"
## [9] "Drug.metabolism.cytochrome.P450"
## [10] "Drug.metabolism.other.enzymes"
## [11] "Ethylbenzene.degradation"
## [12] "Fluorobenzoate.degradation"
## [13] "Furfural.degradation"
## [14] "Metabolism.of.xenobiotics.by.cytochrome.P450"
## [15] "Naphthalene.degradation"
## [16] "Nitrotoluene.degradation"
## [17] "Polycyclic.aromatic.hydrocarbon.degradation"
## [18] "Steroid.degradation"
## [19] "Styrene.degradation"
## [20] "Toluene.degradation"
## [21] "Xylene.degradation"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "Aminobenzoate.degradation", "Atrazine.degradation", "Benzoate.degradation",
"Bisphenol.degradation", "Caprolactam.degradation", "Chloroalkane.and.chloroalkene.degradation",
"Chlorocyclohexane.and.chlorobenzene.degradation", "Dioxin.degradation", "Drug.metabolism.cytochrome.P450",
"Drug.metabolism.other.enzymes", "Ethylbenzene.degradation", "Fluorobenzoate.degradation",
"Furfural.degradation", "Metabolism.of.xenobiotics.by.cytochrome.P450",
"Naphthalene.degradation", "Nitrotoluene.degradation", "Polycyclic.aromatic.hydrocarbon.degradation",
"Steroid.degradation", "Styrene.degradation", "Toluene.degradation",
"Xylene.degradation"))
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "Aminobenzoate.degradation", "AtrD", "Benzoate.degradation",
"B", "CapD", "ChloroalkaneD",
"ChlcbD", "DioxinD", "Dc",
"DrugO", "EbD", "FbD",
"FfD", "Mxc", "NaD",
"NiD", "PcahD", "SteroidD",
"StyreneD", "TolueneD", "XyleneD"))
p2<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=18, face="bold")) +
labs(caption="AtrD: Atrazine.degradation; B: Bisphenol.degradation; CapD: Caprolactam.degradation; ChlcbD: Chloroalkane.and.chloroalkene.degradation; ChlorocyclohexaneD: Chlorocyclohexane.and.chlorobenzene.degradation; DioxinD: Dioxin.degradation; Dc: Drug.metabolism.cytochrome.P450.; DrugO: Drug.metabolism.other.enzymes; EbD: Ethylbenzene.degradation; FbD: Fluorobenzoate.degradation; FfD: Furfural.degradation;
Mxc: Metabolism.of.xenobiotics.by.cytochrome; NaD: Naphthalene.degradation; NiD: Nitrotoluene.degradation; PcahD: Polycyclic.aromatic.hydrocarbon.degradation; SD: Steroid.degradation; StyreneD: Styrene.degradation; TolueneD: Toluene.degradation; XyleneD: Xylene.degradation" )
p2

#ggsave("pdf2/Fig.S8b.corrHeatmap.env.XenoBDM.2023.01.05.pdf", width = 30, height = 7)
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(rgi.mgm1))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:21120] -0.38 -0.28 -0.28 0.17 0.48 -0.23 -0.1 -0.19 -0.32 -0.35 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"Antibiotic resistance genes (ARG)"
p3<-ggplot(cor.out.sig, aes(Y,X)) +
facet_grid(.~ Y_2, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "red"),
panel.spacing = unit(0.1, "lines"),
axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=18, face="bold"))
p3

#ggsave("pdf2/Fig.S8c.corrHeatmap.env.ARG.genes.2023.01.09.pdf", width = 30, height = 7)
#Supplementary Fig. 11
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="Energy metabolism",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Carbon.fixation.pathways.in.prokaryotes"
## [2] "Methane.metabolism"
## [3] "Nitrogen.metabolism"
## [4] "Oxidative.phosphorylation"
## [5] "Sulfur.metabolism"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("Carbon.fixation", "Methane.metabolism", "Nitrogen.M", "Oxidative.phosphorylation", "Sulfur.metabolism" ))
p1<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "blue"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=8, face="bold")) +
labs(caption="Nitrogen.M: Nitrogen metabolism")
p1

#ggsave("pdf2/Fig.S9a.corrHeatmap.env.Energy.metabolism.2023.01.05.pdf", width = 10, height = 3)
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)
ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
"TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
"AllP_abu","AllP_bsa","AllP_rich")]
env.tmp2 <- data.frame(t(ko.sel))
spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
## num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X,
levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
"AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X,
labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
"Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
"Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio",
"Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)
cor.out<-cSplit(cor.out, "Y", "_")
cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
"Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))
cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
"Energy metabolism","Membrane transport","CC","GDM","MAC"))
cor.out.sub<-cor.out[cor.out$Y_4=="Membrane transport",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "ABC.transporters" "Phosphotransferase.system"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("ABC.transporters" , "PS" ))
p2<-ggplot(cor.out.sub, aes(Y_1,X)) +
facet_grid(.~ Y_5, scales = "free", space = "free" )+
geom_tile(aes(fill = r), size=0)+
geom_hline(yintercept = c(8.5,9.5), color = "red")+
scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
strip.background = element_rect(fill = "blue"),
panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y=element_text(colour="black", size=8, face="bold")) +
labs(caption="PS: Phosphotransferase.system")
p2

#ggsave("pdf2/Fig.S9b.corrHeatmap.env.Membrane.transport.2023.01.05.pdf", width = 10, height = 3)
R Markdown